/* Grapheme cluster break function.
   Copyright (C) 2010-2025 Free Software Foundation, Inc.

   This file is free software.
   It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
   You can redistribute it and/or modify it under either
     - the terms of the GNU Lesser General Public License as published
       by the Free Software Foundation, either version 3, or (at your
       option) any later version, or
     - the terms of the GNU General Public License as published by the
       Free Software Foundation; either version 2, or (at your option)
       any later version, or
     - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".

   This file is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License and the GNU General Public License
   for more details.

   You should have received a copy of the GNU Lesser General Public
   License and of the GNU General Public License along with this
   program.  If not, see <https://www.gnu.org/licenses/>.  */

/* Written by Ben Pfaff, Daiki Ueno, Bruno Haible.  */

/* This file implements section 3 "Grapheme Cluster Boundaries"
   of Unicode Standard Annex #29 <https://www.unicode.org/reports/tr29/>.  */

const UNIT *
FUNC (const UNIT *s, const UNIT *s_end)
{
  if (s == s_end)
    return NULL;

  /* Grapheme Cluster break property of the last character.
     -1 at the very beginning of the string.  */
  int last_char_prop = -1;

  /* True if the last character ends a sequence of Indic_Conjunct_Break
     values:  consonant {extend|linker}*  */
  bool incb_consonant_extended = false;
  /* True if the last character ends a sequence of Indic_Conjunct_Break
     values:  consonant {extend|linker}* linker  */
  bool incb_consonant_extended_linker = false;
  /* True if the last character ends a sequence of Indic_Conjunct_Break
     values:  consonant {extend|linker}* linker {extend|linker}*  */
  bool incb_consonant_extended_linker_extended = false;

  /* True if the last character ends an emoji modifier sequence
     \p{Extended_Pictographic} Extend*.  */
  bool emoji_modifier_sequence = false;
  /* True if the last character was immediately preceded by an
     emoji modifier sequence   \p{Extended_Pictographic} Extend*.  */
  bool emoji_modifier_sequence_before_last_char = false;

  /* Number of consecutive regional indicator (RI) characters seen
     immediately before the current point.  */
  size_t ri_count = 0;

  do
    {
      ucs4_t uc;
      int count = U_MBTOUC (&uc, s, s_end - s);
      int prop = uc_graphemeclusterbreak_property (uc);
      int incb = uc_indic_conjunct_break (uc);

      /* Break at the start of the string (GB1).  */
      if (last_char_prop < 0)
        /* *p = 1 */;
      else
        {
          /* No break between CR and LF (GB3).  */
          if (last_char_prop == GBP_CR && prop == GBP_LF)
            /* *p = 0 */;
          /* Break before and after newlines (GB4, GB5).  */
          else if ((last_char_prop == GBP_CR
                    || last_char_prop == GBP_LF
                    || last_char_prop == GBP_CONTROL)
                   || (prop == GBP_CR
                       || prop == GBP_LF
                       || prop == GBP_CONTROL))
            break /* *p = 1 */;
          /* No break between Hangul syllable sequences (GB6, GB7, GB8).  */
          else if ((last_char_prop == GBP_L
                    && (prop == GBP_L
                        || prop == GBP_V
                        || prop == GBP_LV
                        || prop == GBP_LVT))
                   || ((last_char_prop == GBP_LV
                        || last_char_prop == GBP_V)
                       && (prop == GBP_V
                           || prop == GBP_T))
                   || ((last_char_prop == GBP_LVT
                        || last_char_prop == GBP_T)
                       && prop == GBP_T))
            /* *p = 0 */;
          /* No break before extending characters or ZWJ (GB9).  */
          else if (prop == GBP_EXTEND || prop == GBP_ZWJ)
            /* *p = 0 */;
          /* No break before SpacingMarks (GB9a).  */
          else if (prop == GBP_SPACINGMARK)
            /* *p = 0 */;
          /* No break after Prepend characters (GB9b).  */
          else if (last_char_prop == GBP_PREPEND)
            /* *p = 0 */;
          /* No break within certain combinations of Indic_Conjunct_Break
             values: Between
               consonant {extend|linker}* linker {extend|linker}*
             and
               consonant
             (GB9c).  */
          else if (incb_consonant_extended_linker_extended
                   && incb == UC_INDIC_CONJUNCT_BREAK_CONSONANT)
            /* *p = 0 */;
          /* No break within emoji modifier sequences or emoji zwj sequences
             (GB11).  */
          else if (last_char_prop == GBP_ZWJ
                   && emoji_modifier_sequence_before_last_char
                   && uc_is_property_extended_pictographic (uc))
            /* *p = 0 */;
          /* No break between RI if there is an odd number of RI
             characters before (GB12, GB13).  */
          else if (prop == GBP_RI && (ri_count % 2) != 0)
            /* *p = 0 */;
          /* Break everywhere (GB999).  */
          else
            break /* *p = 1 */;
        }

      incb_consonant_extended_linker =
        incb_consonant_extended && incb == UC_INDIC_CONJUNCT_BREAK_LINKER;
      incb_consonant_extended_linker_extended =
        (incb_consonant_extended_linker
         || (incb_consonant_extended_linker_extended
             && incb >= UC_INDIC_CONJUNCT_BREAK_LINKER));
      incb_consonant_extended =
        (incb == UC_INDIC_CONJUNCT_BREAK_CONSONANT
         || (incb_consonant_extended
             && incb >= UC_INDIC_CONJUNCT_BREAK_LINKER));

      emoji_modifier_sequence_before_last_char = emoji_modifier_sequence;
      emoji_modifier_sequence =
        (emoji_modifier_sequence && prop == GBP_EXTEND)
        || uc_is_property_extended_pictographic (uc);

      last_char_prop = prop;

      if (prop == GBP_RI)
        ri_count++;
      else
        ri_count = 0;

      s += count;
    }
  while (s < s_end);

  return s;
}
